# -*-coding:UTF-8-*-
# encoding:gbk
from asyncio import ensure_future
from asyncio.windows_events import NULL
import requests
from bs4 import BeautifulSoup
import sys
from pymysql.converters import escape_string

import json
# python对于json有ijson插件但是感觉用的不是特别舒服，
# 最后选择用原生的json模块+全局变量方式解决

import pymysql
from soupsieve import escape

db = pymysql.connect(host='localhost', port=3306, user='root',
                     passwd='root', db='shengjing', charset='utf8')
cursor = db.cursor()

# 全局定义一个data变量,用来存储每大章的数据
data = {}

# 定义全局存放地址
# 直接复制地址时,应该修改斜杠
file_url = "D:/Desktop/biblical-reptile/sj_json"

# 定义全局需爬取的网络地址
web_url = "http://shengjing.55cha.com/"


def request(url):
    # print(url)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko Core/1.70.3877.400 '
                      'QQBrowser/10.8.4506.400',
    }
    try:
        response = requests.get(url=url, headers=headers)
        # print(response.status_code)
        if response.status_code == 200:
            return response.text
    except requests.RequestException:
        return None


def save_to_json(soup, row, page):
    print(page)

    tilte_row = []
    for span in soup.find_all('h3', class_='pt'):
        a = span.string
        tilte_row.append(a)
        # print(tilte_row)

    article_row = soup.find_all('p', class_='l180')

    data["tilteRow"+str(page)] = str(tilte_row)
    data["articleRow"+str(page)] = str(article_row)

    print(data["menu"][0][page - 1])

    data["articleRow"+str(page)] = escape_string(data["articleRow"+str(page)])

    try:
        sql = "INSERT INTO `article` ( `title`, `content`, `menu_id`, `secondlevelmenu_id`)" + " VALUES ('%s','%s','%s','%s')" % (data["menu"][0][page - 1],
                                                                                                                                  data["articleRow"+str(page)], row, page)
        # print(sql)

        cursor.execute(sql)
        db.commit()
    except Exception as e:
        db.rollback()
        print(e)


def page_save(row):
    # print(page)
    url = str(web_url) + str(row) + '.html'
    html = request(url)
    soup = BeautifulSoup(html, 'lxml')
    # 获取长度
    for span in soup.find_all('ul', class_='l4'):
        page_row = span.find_all('li')
    # 获取值
    page_row2 = []
    for span in soup.find_all('ul', class_='l4'):
        for i in span.find_all('li'):
            a = i.find('a').string
            page_row2.append(a)
    # 创建JSON文件,并写入数据

    data["menu"] = page_row2,

    return len(page_row)


def save(row):

    # print(data)
    global data
    file_name = str(file_url) + \
        str(row) + '.json'
    print(data["menu"][0])
    try:
        sql = "INSERT INTO `secondlevelmenu` (`id`, `name`, `menu_id`)" + \
            " VALUES ('%s','%s','%s')" % (row, json.dumps(
                data["menu"][0], default=lambda obj: obj.__dict__), row)
        # print(sql)

        cursor.execute(sql)
        db.commit()
    except Exception as e:
        db.rollback()
        print(e)
    # # 存储方法
    # with open(file_name, 'w', encoding='utf-8') as file_obj:
    #     '''写入json文件'''
    #     file_obj.write(json.dumps(data, indent=4, ensure_ascii=False))

    data = {}


def main(row, page):

    url = str(web_url) + str(row) + '_' + str(page) + '.html'
    # print(url)
    html = request(url)

    soup = BeautifulSoup(html, 'lxml')
    # print(html)
    # type = sys.getfilesystemencoding()
    save_to_json(soup, row, page)
    return


if __name__ == '__main__':
    # # 上面是测试单个数据的
    # for rows in range(1, 2):
    #     page = page_save(rows)
    #     for pages in range(1, 5):
    #         main(rows, pages)
    #     save(rows)

    # 下方是测试多条数据的
    for rows in range(1, 67):
        # 在大章目录爬取目录表计算长度,返回page值,然后循环爬取当且页面值
        page = page_save(rows)
        # for pages in range(1, page+1):
        #     main(rows, pages)

        save(rows)

    # 关闭光标对象
    cursor.close()
    # 关闭数据库连接
    db.close()
